For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Sites

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all[-c(1:2)]))
##   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr    Mn    Fe 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Mo    Rh    Ag    Sn 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ba    La    Ta    Pb    Bi 
##     0     0     0     0     0

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Ba) %>% 
        select(-MgO) %>% 
        select(-Cu)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
##  [1] "Ti" "Cr" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr" "Mo" "Rh" "Ag" "Sn" "La" "Ta"
## [16] "Pb" "Bi"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:10])
## [1] "Ti" "Cr" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.2410 1.0139 0.9257 0.68360 0.51308 0.46081 0.30685
## Proportion of Variance 0.6277 0.1285 0.1071 0.05841 0.03291 0.02654 0.01177
## Cumulative Proportion  0.6277 0.7562 0.8634 0.92179 0.95469 0.98124 0.99301
##                            PC8
## Standard deviation     0.23654
## Proportion of Variance 0.00699
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_final[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas (Copper included)")
    rect.dendrogram(dend, 7, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend3 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    type <- as.factor(pxrf_final[, 2])
    n_type <- length(unique(type))
    cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
    col_type <- cols_t[type]
    labels_colors(dend3) <- col_type[order.dendrogram(dend3)]

    plot(dend3, main="HCA with sample types (NO COPPER)")
    rect.dendrogram(dend3, 6, border = "Black", lty = 5)
    legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Removing duplicates for clarity
loi <- subset(loi[1:47, ])
tga <- subset(tga[-c(18,20,47,49), ])

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##                  context        type     c550      c950
## AY-1    Area D acropolis    mudbrick 2.088275 4.8891890
## AY-2    Area D acropolis    mudbrick 2.210488 1.9841768
## AY-3    Area D acropolis    mudbrick 1.360910 2.2488113
## AY-4    Area D acropolis    mudbrick 1.430588 1.8964316
## AY-5    Area D acropolis    mudbrick 1.338035 3.3256238
## AY-6    Area D acropolis mud plaster 1.480312 1.2721627
## AY-7    Area D acropolis    mudbrick 1.430335 3.1305245
## AY-8    Area D acropolis mud plaster 1.649521 1.9801623
## AY-9    Area D acropolis    mudbrick 2.368712 3.9045130
## AY-10   Area D acropolis    mudbrick 1.049666 3.1941032
## AY-11   Area D acropolis    mudbrick 1.082732 1.9786697
## AY-12   Area D acropolis         PEM 1.183502 0.7730823
## AY-13   Area D acropolis    mudbrick 1.389532 4.5525165
## AY-14   Area D acropolis    mudbrick 1.787980 2.7523295
## AY-15   Area D acropolis    mudbrick 2.233289 2.7459408
## AY-16   Area D acropolis    mudbrick 1.026364 0.9583722
## AY-17   Area D acropolis    mudbrick 1.569395 3.5356312
## AY-18   Area D acropolis    mudbrick 1.770410 2.6403398
## AY-19   Area D acropolis    mudbrick 1.454623 3.3395017
## AY-20   Area D acropolis mud plaster 1.593025 1.5216091
## AY-21   Area D acropolis    mudbrick 1.729351 1.7560317
## AY-22        Area D wall    mudbrick 1.280392 1.5344272
## AY-23        Area D wall    mudbrick 1.782016 1.9743301
## AY-24        Area D wall    mudbrick 1.697649 1.1877784
## AY-25        Area D wall    mudbrick 1.528936 2.7365019
## AY-26        Area D wall    mudbrick 1.802970 3.6061442
## AY-27        Area D wall mud plaster 1.885674 1.5169278
## AY-28        Area D wall    mudbrick 1.399589 0.7854046
## AY-29        Area D wall mud plaster 1.855100 2.0634749
## AY-30        Area D wall    mudbrick 2.552046 3.4633045
## AY-31        Area D wall    mudbrick 1.455348 3.4109094
## AY-32        Area D wall    mudbrick 1.244529 1.4577882
## AY-33        Area D wall    mudbrick 1.300813 0.8936149
## AY-34        Area D wall    mudbrick 1.581909 1.7044229
## AY-35        Area D wall    mudbrick 1.931867 2.6061513
## AY-36        Area D wall    mudbrick 2.422288 2.5205456
## AY-37        Area D wall    mudbrick 1.139581 1.0607829
## AY-38        Area D wall    mudbrick 2.130927 1.4162417
## AY-39        Area D wall  mud mortar 1.904037 2.5000000
## AY-40        Area D wall mud plaster 2.482920 2.5301253
## AY-41        Area D wall    mudbrick 2.565292 3.7553575
## AY-42 Hellenistic area A    mudbrick 1.593653 1.9123424
## AY-50    Builder's floor         PEM 1.012178 1.5457741
## AY-51    Builder's floor         PEM 1.212985 1.2668590
## AY-52    Builder's floor         PEM 1.763143 2.3906130
## AY-53    Builder's floor         PEM 1.402188 8.0417546
## AY-54    Builder's floor         PEM 1.843655 2.9683284

TGA dataset:

tga[c(8:9,10:11)]
##                  context        type     c550      c950
## AY-1    Area D acropolis    mudbrick 2.398013 2.4471417
## AY-2    Area D acropolis    mudbrick 1.736874 7.9947176
## AY-4    Area D acropolis    mudbrick 1.929141 1.4280310
## AY-3    Area D acropolis    mudbrick 1.233825 3.4734918
## AY-5    Area D acropolis    mudbrick 1.644479 3.5987261
## AY-7    Area D acropolis    mudbrick 1.505973 3.4082173
## AY-8    Area D acropolis mud plaster 1.558115 3.6606406
## AY-9    Area D acropolis    mudbrick 1.927070 3.2099602
## AY-10   Area D acropolis    mudbrick 1.176203 3.8700760
## AY-11   Area D acropolis    mudbrick 1.248990 1.3615058
## AY-12   Area D acropolis         PEM 1.274945 0.7860752
## AY-13   Area D acropolis    mudbrick 1.281796 3.7069429
## AY-14   Area D acropolis    mudbrick 1.971081 2.2591414
## AY-15   Area D acropolis    mudbrick 2.647777 2.4588519
## AY-16   Area D acropolis    mudbrick 1.329581 3.4621816
## AY-17   Area D acropolis    mudbrick 1.631206 4.5833762
## AY-18   Area D acropolis    mudbrick 1.698302 3.6382114
## AY-19   Area D acropolis    mudbrick 1.874606 3.2830310
## AY-20   Area D acropolis mud plaster 1.877608 2.0755290
## AY-21   Area D acropolis    mudbrick 1.413217 1.8372703
## AY-22        Area D wall    mudbrick 2.099634 2.0131512
## AY-23        Area D wall    mudbrick 1.749414 1.3528300
## AY-24        Area D wall    mudbrick 1.711251 1.4208525
## AY-25        Area D wall    mudbrick 1.938736 1.9671807
## AY-26        Area D wall    mudbrick 1.663242 3.9001094
## AY-27        Area D wall mud plaster 1.696577 1.6560255
## AY-28        Area D wall    mudbrick 1.259601 0.7233976
## AY-29        Area D wall mud plaster 1.724623 3.9961850
## AY-30        Area D wall    mudbrick 2.586865 3.2631063
## AY-31        Area D wall    mudbrick 1.343570 3.0447471
## AY-32        Area D wall    mudbrick 1.336629 1.4660852
## AY-33        Area D wall    mudbrick 1.268116 0.8460754
## AY-34        Area D wall    mudbrick 1.692888 1.4506317
## AY-35        Area D wall    mudbrick 2.002379 2.3973296
## AY-6    Area D acropolis mud plaster 1.659626 1.2887389
## AY-36        Area D wall    mudbrick 2.500488 2.3943098
## AY-37        Area D wall    mudbrick 1.077605 0.7994378
## AY-38        Area D wall    mudbrick 2.164785 1.6976633
## AY-39        Area D wall  mud mortar 1.915888 2.8108623
## AY-40        Area D wall mud plaster 2.618731 2.4977211
## AY-42 Hellenistic area A    mudbrick 1.655597 1.5855926
## AY-50    Builder's floor         PEM 1.347992 1.5505378
## AY-51    Builder's floor         PEM 1.282281 1.6713598
## AY-52    Builder's floor         PEM 1.492264 2.2490706
## AY-53    Builder's floor         PEM 1.276679 5.4590326
## AY-54    Builder's floor         PEM 1.651196 3.0674290

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI",
        x ="Temperature", y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All possible elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all[-c(1:2)]))
##   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr    Mn    Fe 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Mo    Ag    Sn    Ba 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ta    Pb 
##     0     0

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Pb) %>% 
        select(-MgO) %>% 
        select(-Cu)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
##  [1] "Ti" "V"  "Cr" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr" "Nb" "Mo" "Ag" "Sn" "Ba"
## [16] "Ta"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:10])
## [1] "Ti" "V"  "Cr" "Mn" "Fe" "Zn" "Rb" "Sr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6       PC7
## Standard deviation     2.1512 1.4211 0.8593 0.55694 0.54283 0.09815 9.831e-17
## Proportion of Variance 0.5784 0.2525 0.0923 0.03877 0.03683 0.00120 0.000e+00
## Cumulative Proportion  0.5784 0.8309 0.9232 0.96196 0.99880 1.00000 1.000e+00
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam Byzantine elements")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam Byzantine grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 7 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by type:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    type <- as.factor(pxrf_final[, 2])
    n_type <- length(unique(type))
    cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
    col_type <- cols_t[type]
    labels_colors(dend) <- col_type[order.dendrogram(dend)]

    plot(dend, main="HCA with sample types (Copper included)")
    rect.dendrogram(dend, 5, border = "Black", lty = 5)
    legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##         context        type      c550      c950
## AY-43 Byzantine     ceiling 2.8231692 13.606389
## AY-44 Byzantine       floor 3.1549254  4.143032
## AY-45 Byzantine PEM (burnt) 9.7500231  6.336877
## AY-46 Byzantine         PEM 3.0153992  1.087679
## AY-47 Byzantine     ceiling 2.2356517  6.463715
## AY-48 Byzantine     ceiling 1.8207856 15.544905
## AY-49 Byzantine         PEM 0.4835757 32.404290

TGA dataset:

tga[c(8:9,10:11)]
##         context        type      c550      c950
## AY-43 Byzantine     ceiling 3.2957418 12.043832
## AY-44 Byzantine       floor 4.3270582  8.653940
## AY-45 Byzantine PEM (burnt) 6.2941554  9.781651
## AY-46 Byzantine         PEM 3.8585514  6.813924
## AY-47 Byzantine     ceiling 2.0075863  7.902190
## AY-48 Byzantine     ceiling 1.8409714 11.731844
## AY-49 Byzantine         PEM 0.3230349 29.724115

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Traditional LOI",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI",
        x ="Temperature", y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by type", 
           color = "Type",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by type", 
           color = "Type",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all[-c(1:2)]))
##   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr    Mn    Fe 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ni    Cu    Zn    As    Se    Rb    Sr     Y    Zr    Nb    Mo    Ag    Sn 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ba    La 
##     0     0

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-Nb) %>% 
        select(-MgO) %>% 
        select(-Cu)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
##  [1] "Ti" "V"  "Cr" "Mn" "Fe" "Zn" "As" "Se" "Rb" "Sr" "Y"  "Zr" "Mo" "Ag" "Sn"
## [16] "Ba" "La"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no floor or lime plaster)

pxrf_MB <- pxrf_final[-c(1:7), ]
rownames(pxrf_MB)
##  [1] "AH-4"  "AH-5"  "RLZ-1" "TI-1"  "TI-10" "TI-2"  "TI-3"  "TI-4"  "TI-5" 
## [10] "TI-6"  "TI-7"  "TI-8"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:10])
## [1] "Ti" "V"  "Cr" "Mn" "Fe" "Zn" "As" "Se"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.8541 1.1938 1.0027 0.9184 0.89140 0.57933 0.29452
## Proportion of Variance 0.4297 0.1781 0.1257 0.1054 0.09933 0.04195 0.01084
## Cumulative Proportion  0.4297 0.6078 0.7335 0.8389 0.93827 0.98022 0.99106
##                            PC8
## Standard deviation     0.26736
## Proportion of Variance 0.00894
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with only mudbrick samples:

# Elements
colnames(pxrf_MB[3:10])
## [1] "Ti" "V"  "Cr" "Mn" "Fe" "Zn" "As" "Se"
# PCA analysis
pca_3 <- prcomp(pxrf_MB[3:10])
summary(pca_3)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7 PC8
## Standard deviation     2.1309 1.2538 0.9196 0.70886 0.33529 0.21691 0.03854   0
## Proportion of Variance 0.5958 0.2062 0.1109 0.06592 0.01475 0.00617 0.00019   0
## Cumulative Proportion  0.5958 0.8020 0.9130 0.97888 0.99363 0.99981 1.00000   1
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel MB elements")

autoplot(pca_3, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel MB grouped by area")

PCA(pxrf_MB[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 12 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_MB[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas")
    rect.dendrogram(dend, 4, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##              context                  type     c550      c950
## AH-1        Ad Halom   lime plaster (fine) 3.636174 29.250009
## AH-2        Ad Halom                 floor 1.946490  9.981413
## AH-3        Ad Halom lime plaster (coarse) 4.192486 19.321298
## AH-4        Ad Halom              mudbrick 2.542779  4.414064
## AH-5        Ad Halom              mudbrick 1.471627  7.026742
## AH-6        Ad Halom         floor plaster 1.774556 40.504665
## AH-7        Ad Halom lime plaster (coarse) 4.941322 18.079922
## AH-8        Ad Halom                 floor 2.262738 15.868588
## AH-9        Ad Halom   lime plaster (fine) 1.674261 33.459065
## TI-1    Tell Iztabba              mudbrick 2.056555 42.038495
## TI-2    Tell Iztabba              mudbrick 1.847381 42.608634
## TI-3    Tell Iztabba              mudbrick 3.595561 29.149144
## TI-4    Tell Iztabba              mudbrick 4.892311 29.255047
## TI-5    Tell Iztabba              mudbrick 3.460312 27.795385
## TI-6    Tell Iztabba              mudbrick 3.169395 33.345746
## TI-7    Tell Iztabba              mudbrick 2.381649 40.992167
## TI-8    Tell Iztabba              mudbrick 3.088721 26.134740
## TI-10   Tell Iztabba              mudbrick 2.061447 41.299332
## RLZ-1 Rishon Le'Zion              mudbrick 2.364044  3.727290

TGA dataset:

tga[c(8:9,10:11)]
##              context     type     c550      c950
## AH-4        Ad Halom mudbrick 2.789774  4.486895
## AH-5        Ad Halom mudbrick 1.106870  4.457738
## RLZ-1 Rishon Le'Zion mudbrick 2.604865  4.169125
## TI-1    Tell Iztabba mudbrick 1.893905 42.031107
## TI-2    Tell Iztabba mudbrick 1.707268 42.691722
## TI-3    Tell Iztabba mudbrick 3.102582 30.945771
## TI-4    Tell Iztabba mudbrick 4.991016 30.111368
## TI-5    Tell Iztabba mudbrick 3.279413 28.194114
## TI-6    Tell Iztabba mudbrick 2.922239 34.000000
## TI-7    Tell Iztabba mudbrick 2.051722 41.455573
## TI-8    Tell Iztabba mudbrick 2.931780 29.661181
## TI-10   Tell Iztabba mudbrick 2.014504 41.949927

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (Ad-Halom non-MB samples are included!)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (Only MB)",
        x ="Temperature", y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

MB_table <- matrix(c("LA54/4",  "30 cm x 18 cm", "Top row - weathered",
                  "LA54/4", "25 cm x 18 cm",    "Mid 4th row from the top",
                  "LA54/4", "35 cm x 18 cm (est.)", "B. 8th row from the top",
                  "LA54/4", "45 cm x 18 cm",    "9th row from the top",
                  "LA54/4", "46 cm x 16 cm",    "6th row from the top",
                  "LA59/2", "45 cm x 15 cm (est.)", "Top row - weathered",
                  "LA59/2", "45 cm x 15 cm (est.)", "3rd row from the top",
                  "LA59/2", "33 cm x 15 cm (est.)", "5th row below stones",
                  "LA59/2", "44 cm x 13 cm",    "9th row of the top",
                  "LA59/2", "40 cm x 20 cm",    "B. below disturbance under the marl",
                  "LA54/7", "14 cm x 15 cm (est.)", "(no additional info)"), 
                  ncol=3, byrow=TRUE)
colnames(MB_table) <- c("Context", "Size", "Row")
rownames(MB_table) <- c("PP-9", "PP-10", "PP-11", "PP-12", "PP-13", "PP-14", "PP-15", "PP-16", "PP-17", "PP-18", "PP-19")
MB_table <- as.table(MB_table)

library(knitr)
kable(MB_table)
Context Size Row
PP-9 LA54/4 30 cm x 18 cm Top row - weathered
PP-10 LA54/4 25 cm x 18 cm Mid 4th row from the top
PP-11 LA54/4 35 cm x 18 cm (est.) B. 8th row from the top
PP-12 LA54/4 45 cm x 18 cm 9th row from the top
PP-13 LA54/4 46 cm x 16 cm 6th row from the top
PP-14 LA59/2 45 cm x 15 cm (est.) Top row - weathered
PP-15 LA59/2 45 cm x 15 cm (est.) 3rd row from the top
PP-16 LA59/2 33 cm x 15 cm (est.) 5th row below stones
PP-17 LA59/2 44 cm x 13 cm 9th row of the top
PP-18 LA59/2 40 cm x 20 cm B. below disturbance under the marl
PP-19 LA54/7 14 cm x 15 cm (est.) (no additional info)

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(plot3D); # 3D plots
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use) pxrf_errors: Averaged data including averaged errors, missing and dubious elements emitted

pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- pxrf_errors %>% 
        select(-Al2O3) %>% 
        select(-'Al2O3;Err') %>%   
        select(-CaO) %>% 
        select(-'CaO;Err') %>% 
        select(-SiO2) %>%
        select(-'SiO2;Err') %>%
        select(-Cl) %>%  
        select(-'Cl;Err') %>% 
        select(-K2O) %>% 
        select(-'K2O;Err') %>% 
        select(-Ni) %>% 
        select(-'Ni;Err') %>%   
        select(-P2O5) %>%  
        select(-'P2O5;Err') %>%   
        select(-S) %>% 
        select(-'S;Err') %>%  
        select(-V) %>% 
        select(-'V;Err') %>%   
        select(-As) %>% 
        select(-'As;Err') %>%   
        select(-Rh) %>% 
        select(-'Rh;Err') %>%   
        select(-Ag) %>%   
          select(-'Cr;Err') %>% 
          select(-'Co;Err') %>% 
          select(-'Se;Err') %>% 
          select(-Nb) %>% 
          select(-'Nb;Err') %>% 
          select(-'Mo;Err') %>% 
          select(-'Pd;Err') %>% 
          select(-'Cd;Err') %>% 
          select(-'Sn;Err') %>% 
          select(-'Sb;Err') %>% 
          select(-'Ba;Err') %>% 
          select(-'La;Err') %>% 
          select(-'Ag;Err') %>% 
          select(-'Ce;Err') %>% 
          select(-'Hf;Err') %>% 
          select(-'Ta;Err') %>% 
          select(-'W;Err') %>% 
          select(-'Pt;Err') %>% 
          select(-'Au;Err') %>% 
          select(-'Hg;Err') %>% 
          select(-'Tl;Err') %>% 
          select(-Pb) %>% 
          select(-'Pb;Err') %>% 
          select(-'Bi;Err') %>% 
          select(-'Th;Err') %>% 
          select(-'U;Err')

pxrf_errors[3:22] <- (pxrf_errors[3:22] * 1000)
pxrf_errors[3:22] <- round(pxrf_errors[3:22], digits = 1)

kable(pxrf_errors[3:22])
MgO MgO;Err Ti Ti;Err Cr Mn Mn;Err Fe Fe;Err Co Cu Cu;Err Zn Zn;Err Se Rb Rb;Err Sr Sr;Err Y
PP-10 3703.8 922.4 195.4 3.9 1.5 39.0 2.6 2213.5 11.9 0 11.3 0.7 3.7 0.4 0 1.5 0.4 39.0 0.6 1.4
PP-11 4009.8 1062.1 229.5 4.3 1.0 38.5 2.8 2386.6 13.0 0 5.5 0.5 2.5 0.3 0 1.6 0.3 29.0 0.5 1.1
PP-12 2918.5 719.6 126.3 3.2 0.0 17.8 1.7 1005.8 7.3 0 5.6 0.6 3.2 0.5 0 1.8 0.5 28.3 0.8 0.5
PP-13 4547.7 1093.6 192.8 4.0 0.0 38.9 2.6 2131.2 12.0 0 11.3 0.7 5.0 0.4 0 2.1 0.3 48.4 0.7 1.5
PP-9 4369.6 1200.2 191.5 3.9 0.6 41.8 2.7 2448.1 12.7 0 7.7 0.6 3.9 0.4 0 2.1 0.3 46.8 0.6 1.5
PP-19 4164.6 853.1 96.9 3.3 0.0 11.9 1.7 961.7 7.8 0 4.5 0.6 1.7 0.4 0 0.9 0.4 31.8 0.7 0.3
PP-14 4430.4 1051.3 218.8 4.2 0.0 33.5 2.6 2224.1 12.5 0 6.7 0.6 2.4 0.3 0 1.7 0.3 35.1 0.6 1.4
PP-15 4088.3 1177.5 182.1 3.7 0.0 27.1 2.3 1878.0 11.1 0 7.3 0.7 3.2 0.5 0 1.9 0.4 35.3 0.7 1.6
PP-16 5124.8 1222.8 192.2 3.9 0.0 39.8 2.6 2300.3 12.5 0 9.7 0.7 3.9 0.4 0 1.7 0.4 37.7 0.6 1.7
PP-17 3273.7 960.4 170.9 3.6 0.7 28.6 2.2 1762.4 10.1 0 6.3 0.6 3.4 0.4 0 1.9 0.4 28.8 0.6 1.3
PP-18 3359.0 969.9 161.8 4.0 0.0 20.8 2.2 1491.6 10.5 0 4.7 0.6 1.9 0.3 0 1.2 0.3 17.9 0.5 0.9
PP-1 3378.9 877.4 136.8 3.4 4.4 24.7 2.0 1295.9 8.8 0 5.7 0.6 3.4 0.5 0 2.0 0.4 33.9 0.7 1.4
PP-2 4717.6 1441.6 155.8 3.4 0.7 44.0 2.7 2338.2 12.1 0 5.9 0.5 3.2 0.3 0 2.1 0.3 34.0 0.5 1.7
PP-3 3290.0 816.9 143.1 3.3 0.0 31.7 2.2 1677.0 9.8 0 6.6 0.6 4.5 0.5 0 2.1 0.4 40.0 0.7 1.6
PP-4 3165.8 1123.0 181.1 4.2 0.8 42.7 2.7 2308.1 12.1 0 5.6 0.5 3.8 0.4 0 2.1 0.3 35.9 0.6 1.6
PP-5 4450.8 1561.7 155.4 3.4 0.0 47.0 2.8 2435.1 12.4 0 5.3 0.5 3.7 0.3 0 2.3 0.3 40.9 0.6 1.8
PP-6 3694.8 1296.6 203.2 3.6 0.0 52.2 2.8 2555.2 12.4 0 5.8 0.5 3.1 0.4 0 2.9 0.3 24.0 0.5 2.2
PP-7 4780.3 1956.9 237.1 3.9 9.1 56.3 3.1 3061.0 13.8 0 5.7 0.5 4.0 0.4 0 3.6 0.3 31.0 0.5 2.6
PP-8 4738.8 1249.7 175.4 3.7 7.8 36.3 2.6 1912.4 11.3 0 6.0 0.6 3.1 0.4 0 2.2 0.3 28.4 0.5 2.1

pxrf_all: All “possible” elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all[-c(1:2)]))
##   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr    Mn    Fe 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Mo    Rh    Ag    Ba 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ta    Pb 
##     0     0

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Rh) %>% 
        select(-Ag) %>% 
        select(-MgO) %>% 
        select(-Cu)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_all[-c(1:2)])
##  [1] "MgO"   "Al2O3" "SiO2"  "P2O5"  "S"     "Cl"    "K2O"   "CaO"   "Ti"   
## [10] "V"     "Cr"    "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"    "Mo"    "Rh"    "Ag"    "Ba"    "Ta"   
## [28] "Pb"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no soil)

pxrf_MB <- pxrf_final[c(1:11), ]

pXRF: K means cluster analysis

Only mudbrick samples, K-means cluster analysis with 5 possible clusters

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:10], 5), data = pxrf_MB, label = TRUE, label.size = 3)

pXRF: PCA

PCA with only mudbrick samples:

# Elements
colnames(pxrf_MB[3:10])
## [1] "Ti" "Cr" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_1 <- prcomp(pxrf_MB[3:10])
summary(pca_1)
## Importance of components:
##                          PC1    PC2     PC3     PC4     PC5    PC6     PC7
## Standard deviation     2.185 1.1583 0.64362 0.38317 0.36284 0.2064 0.13435
## Proportion of Variance 0.695 0.1952 0.06028 0.02137 0.01916 0.0062 0.00263
## Cumulative Proportion  0.695 0.8902 0.95050 0.97187 0.99102 0.9972 0.99985
##                            PC8
## Standard deviation     0.03224
## Proportion of Variance 0.00015
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

PCA(pxrf_MB[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
#3D plot
scores = as.data.frame(pca_1$x)
scatter3D(scores$PC2, scores$PC3, scores$PC1, 
          xlab = "PC2 (23,6 %)", ylab = "PC3 (5,5 %)", zlab="PC1 (64,2 %)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$PC2, scores$PC3, scores$PC1, labels = rownames(scores),
          add = TRUE, colkey = FALSE, cex = 0.7)

PCA with soil samples:

# Elements
colnames(pxrf_final[3:10])
## [1] "Ti" "Cr" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_3 <- prcomp(pxrf_final[3:10])
summary(pca_3)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     2.1385 1.2463 0.9887 0.6657 0.48162 0.35468 0.29005
## Proportion of Variance 0.5716 0.1941 0.1222 0.0554 0.02899 0.01572 0.01052
## Cumulative Proportion  0.5716 0.7658 0.8880 0.9434 0.97236 0.98808 0.99860
##                           PC8
## Standard deviation     0.1059
## Proportion of Variance 0.0014
## Cumulative Proportion  1.0000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_3, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_3, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

HCA including only MB samples, colored by area:

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_MB[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas")
    rect.dendrogram(dend, 5, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

HCA with soil samples, colored by type:

# HCA dendrogram, samples color coded by type:
dend3 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    type <- as.factor(pxrf_final[, 2])
    n_type <- length(unique(type))
    cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
    col_type <- cols_t[type]
    labels_colors(dend3) <- col_type[order.dendrogram(dend3)]

    plot(dend3, main="HCA with all samples by type")
    rect.dendrogram(dend3, 7, border = "Black", lty = 5)
    legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Only MB samples
grain_MB2 <- grain[c(1:11),]
grain_long3 <- melt(grain_MB2[c(1,14:113)], id = "Sample") 

ggplot(grain_long3, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves - MB only")

Barplots for 5 size fractions:

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle("Grain size distribution by fractions")

# Stacked barplot
grain_long4 <- melt(grain_MB2[c(1,9:13)], id = "Sample") 

ggplot(grain_long4, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle("Grain size distribution by fractions - MB only")

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Creating MB only subsets
loi_MB <- subset(loi[9:19, ])

tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ]) # No duplicates

tga_MB1 <- subset(tga[9:22, ]) # Includes some duplicates

# Transforming data to long form (MB only)
loi_MB_long <- loi_MB[c(8,13:14)] 
loi_MB_long <- melt(loi_MB_long, id = "context") 

tga_MB_long <- tga_MB1[c(8,10:11)] 
tga_MB_long <- melt(tga_MB_long, id = "context") 

# Transforming data to long form (Soil included)
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi_MB[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga_MB[c(1,10:11)], value.name = "Temp", id= "Name") 

LOI dataset:

loi[c(8:9,13:14)]
##                 context         type     c550     c950
## PP-1   Hadjuabudllah l1 soil layer 1 5.986894 19.67880
## PP-2   Hadjuabudllah l2 soil layer 2 4.466405 17.59870
## PP-3      Laona soil l1 soil layer 1 5.734589 21.55943
## PP-4      Laona soil l2 soil layer 2 4.323061 17.76702
## PP-5      Laona soil l3 soil layer 3 6.877261 19.61697
## PP-6      Laona soil l1 soil layer 1 5.038672 17.28507
## PP-7      Laona soil l2 soil layer 2 5.582797 17.71488
## PP-8      Laona soil l3 soil layer 3 5.134295 19.63221
## PP-9             LA54:4     mudbrick 7.451505 18.35791
## PP-10            LA54:4     mudbrick 3.557623 14.55842
## PP-11            LA54:4     mudbrick 4.002255 21.24757
## PP-12            LA54:4     mudbrick 2.797378 22.23702
## PP-13            LA54:4     mudbrick 3.156025 21.81049
## PP-14            LA59:2     mudbrick 2.818483 22.65698
## PP-15            LA59:2     mudbrick 3.822946 25.15246
## PP-16            LA59:2     mudbrick 2.365256 21.25664
## PP-17            LA59:2     mudbrick 3.860836 20.58203
## PP-18            LA59:2     mudbrick 2.451871 33.21076
## PP-19            LA54:7     mudbrick 1.739797 33.78112
## PP-1_2 Hadjuabudllah l1 soil layer 1 6.137732 21.18259
## PP-2_2 Hadjuabudllah l2 soil layer 2 4.429557 18.22193
## PP-3_2    Laona soil l1 soil layer 1 5.521160 22.61670
## PP-4_2    Laona soil l2 soil layer 2 4.582459 20.95611
## PP-5_2    Laona soil l3 soil layer 3 6.888629 20.42575
## PP-6_2    Laona soil l1 soil layer 1 5.348806 16.70897
## PP-7_2    Laona soil l2 soil layer 2 5.537857 18.92266
## PP-8_2    Laona soil l3 soil layer 3 5.230428 19.99844

TGA dataset:

tga[c(8:9,10:11)]
##                    context         type     c550     c950
## PP-1      Hadjuabudllah l1 soil layer 1 5.328983 22.05629
## PP-2      Hadjuabudllah l2 soil layer 2 3.798409 18.86880
## PP-3         Laona soil l1 soil layer 1 4.452588 21.57446
## PP-4         Laona soil l2 soil layer 2 4.537622 20.07908
## PP-5         Laona soil l3 soil layer 3 4.657919 23.10036
## PP-6         Laona soil l1 soil layer 1 4.892221 16.89988
## PP-7         Laona soil l2 soil layer 2 4.601116 18.47942
## PP-8         Laona soil l3 soil layer 3 4.525488 20.99294
## PP-9                LA54:4     mudbrick 5.392111 18.62861
## PP-10               LA54:4     mudbrick 2.360845 15.24474
## PP-11               LA54:4     mudbrick 2.773246 24.52314
## PP-11 (2)           LA54:4     mudbrick 3.164817 20.66002
## PP-12               LA54:4     mudbrick 2.734021 21.45383
## PP-13               LA54:4     mudbrick 3.235943 21.34319
## PP-14               LA59:2     mudbrick 3.202329 22.49060
## PP-15               LA59:2     mudbrick 3.508931 22.92802
## PP-16               LA59:2     mudbrick 2.215719 19.75916
## PP-17               LA59:2     mudbrick 3.921377 22.16794
## PP-18               LA59:2     mudbrick 3.234501 31.05334
## PP-19               LA54:7     mudbrick 2.204428 33.79251
## PP-10 (2)           LA54:4     mudbrick 2.226568 14.54839
## PP-9 (2)            LA54:4     mudbrick 5.258741 19.01388
## PP-8 (2)     Laona soil l3 soil layer 3 4.474886 22.50903
## PP-7 (2)     Laona soil l3 soil layer 2 4.739991 19.00483
## PP-6 (2)     Laona soil l1 soil layer 1 4.618851 18.27756
## PP-5 (2)     Laona soil l3 soil layer 3 5.122344 22.59504
## PP-4 (2)     Laona soil l2 soil layer 2 4.559860 20.33749
## PP-3 (2)     Laona soil l1 soil layer 1 4.302926 20.35644

Boxplots

# Colored boxplots: Trad LOI, MB only
ggplot(loi_MB_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (MB only)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI, MB only
ggplot(tga_MB_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (MB only)",
        x ="Temperature", y = "LOI")

# Colored boxplots: Trad LOI, soil samples included
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (soil included)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI, soil samples included
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (soil included)",
        x ="Temperature", y = "LOI")

Biplots

# MB only biplot: Trad LOI
ggplot(loi_MB, 
      aes(c550, c950, label = rownames(loi_MB), color = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI: MB only, organic vs. carbonate content", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# MB only biplot: TGA LOI
ggplot(tga_MB, 
      aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI: MB only, organic vs. carbonate content", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI, soil included - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI, soil included - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI (MB only)", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI (MB only)",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

### Combined results

Combined PCA and HCA: including geochemistry and LOI data

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(plot3D); # 3D plots
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/combo_PP.xlsx", sep=";")

# Transforming remaining NA values to zeros for analysis
pxrf[is.na(pxrf)] <- 0
pxrf[4:16] <- scale(pxrf[4:16])

pxrf_MB <- pxrf[c(9:19), ]
rownames(pxrf_MB) <- pxrf_MB$Sample

PCA & HCA Combided geochemistry + LOI

# PCA analysis
pca_1 <- prcomp(pxrf_MB[4:13])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.5575 1.2647 0.92468 0.76958 0.52297 0.37607 0.32250
## Proportion of Variance 0.6453 0.1578 0.08436 0.05843 0.02698 0.01395 0.01026
## Cumulative Proportion  0.6453 0.8031 0.88749 0.94592 0.97290 0.98686 0.99712
##                            PC8     PC9    PC10
## Standard deviation     0.16348 0.04660 0.01791
## Proportion of Variance 0.00264 0.00021 0.00003
## Cumulative Proportion  0.99975 0.99997 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA (combined results) Palaepaphos elements")

autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA (combined results) Palaepaphos grouped by area")

#3D plot
scores = as.data.frame(pca_1$x)
scatter3D(scores$PC2, scores$PC3, scores$PC1, 
          xlab = "PC2 (21,3 %)", ylab = "PC3 (8,4 %)", zlab="PC1 (60,8 %)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$PC2, scores$PC3, scores$PC1, labels = rownames(scores),
          add = TRUE, colkey = FALSE, cex = 0.7)

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_MB[, 2])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas")
    rect.dendrogram(dend, 6, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all[-c(1:2)]))
##   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr    Mn    Fe 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Co    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Ag    Sn    Ba 
##     0     0     0     0     0     0     0     0     0     0     0     0     0 
##    Ta    Pb    Bi 
##     0     0     0

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Cr)  %>% 
        select(-Ba) %>% 
        select(-MgO) %>% 
        select(-Cu)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
##  [1] "Ti" "Mn" "Fe" "Co" "Zn" "Rb" "Sr" "Y"  "Zr" "Ag" "Sn" "Ta" "Pb" "Bi"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:10])
## [1] "Ti" "Mn" "Fe" "Co" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4    PC5     PC6     PC7
## Standard deviation     2.3586 1.1152 0.85155 0.54983 0.3774 0.13493 0.06450
## Proportion of Variance 0.6954 0.1555 0.09064 0.03779 0.0178 0.00228 0.00052
## Cumulative Proportion  0.6954 0.8508 0.94149 0.97928 0.9971 0.99936 0.99988
##                            PC8
## Standard deviation     0.03094
## Proportion of Variance 0.00012
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Artaxata elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Artaxata grouped by area")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_final[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas (Copper included)")
    rect.dendrogram(dend, 5, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##               context     type     c550     c950
## AA-1          Hill II mudbrick 5.610986 5.756990
## AA-2          Hill II mudbrick 4.711074 7.965070
## AA-3          Hill II mudbrick 4.035368 7.841600
## AA-4    Urartian silo mudbrick 3.080904 7.738517
## AA-5          Phase I mudbrick 2.940399 6.257292
## AA-6          Phase I mudbrick 3.125140 5.507166
## AA-7  Phase II or III mudbrick 4.573474 6.267972
## AA-8  Phase II or III mudbrick 3.769098 7.238074
## AA-9         Phase II mudbrick 3.696483 6.966079
## AA-10        Phase II mudbrick 5.181930 5.580024

TGA dataset:

tga[c(8:9,10:11)]
##                  context     type     c550     c950
## AA-1             Hill II mudbrick 5.811508 5.611934
## AA-2             Hill II mudbrick 4.751337 8.599873
## AA-3             Hill II mudbrick 4.078462 7.997570
## AA-4       Urartian silo mudbrick 3.403442 7.086302
## AA-4 (2)   Urartian silo mudbrick 3.415174 7.167431
## AA-5             Phase I mudbrick 2.799218 6.532557
## AA-6             Phase I mudbrick 3.118409 4.097341
## AA-7     Phase II or III mudbrick 4.395924 6.082521
## AA-8     Phase II or III mudbrick 3.689788 6.722017
## AA-9            Phase II mudbrick 4.417015 6.467449
## AA-10           Phase II mudbrick 4.307988 6.154440

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI",
       color = "Context",
       x ="Temperature", 
       y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI",
       color = "Context",
       x ="Temperature", 
       y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)